{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e03cfbe6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: sagemaker>=2.48.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (2.66.0)\n",
      "Collecting sagemaker>=2.48.0\n",
      "  Downloading sagemaker-2.68.0.tar.gz (452 kB)\n",
      "\u001B[K     |████████████████████████████████| 452 kB 7.3 MB/s eta 0:00:01\n",
      "\u001B[?25hCollecting transformers==4.6.1\n",
      "  Downloading transformers-4.6.1-py3-none-any.whl (2.2 MB)\n",
      "\u001B[K     |████████████████████████████████| 2.2 MB 49.2 MB/s eta 0:00:01\n",
      "\u001B[?25hCollecting datasets[s3]==1.6.2\n",
      "  Downloading datasets-1.6.2-py3-none-any.whl (221 kB)\n",
      "\u001B[K     |████████████████████████████████| 221 kB 74.0 MB/s eta 0:00:01\n",
      "\u001B[?25hCollecting tokenizers<0.11,>=0.10.1\n",
      "  Downloading tokenizers-0.10.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
      "\u001B[K     |████████████████████████████████| 3.3 MB 21.1 MB/s eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: regex!=2019.12.17 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (2020.11.13)\n",
      "Requirement already satisfied: importlib-metadata in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (4.8.1)\n",
      "Collecting filelock\n",
      "  Downloading filelock-3.3.2-py3-none-any.whl (9.7 kB)\n",
      "Collecting sacremoses\n",
      "  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
      "\u001B[K     |████████████████████████████████| 895 kB 36.6 MB/s eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: numpy>=1.17 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (1.19.5)\n",
      "Requirement already satisfied: tqdm>=4.27 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (4.62.3)\n",
      "Requirement already satisfied: requests in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (2.26.0)\n",
      "Collecting huggingface-hub==0.0.8\n",
      "  Downloading huggingface_hub-0.0.8-py3-none-any.whl (34 kB)\n",
      "Requirement already satisfied: dataclasses in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (0.8)\n",
      "Requirement already satisfied: packaging in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from transformers==4.6.1) (21.0)\n",
      "Requirement already satisfied: dill in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (0.3.4)\n",
      "Requirement already satisfied: multiprocess in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (0.70.12.2)\n",
      "Collecting tqdm>=4.27\n",
      "  Downloading tqdm-4.49.0-py2.py3-none-any.whl (69 kB)\n",
      "\u001B[K     |████████████████████████████████| 69 kB 13.2 MB/s eta 0:00:01\n",
      "\u001B[?25hCollecting xxhash\n",
      "  Downloading xxhash-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl (243 kB)\n",
      "\u001B[K     |████████████████████████████████| 243 kB 67.6 MB/s eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: fsspec in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (2021.4.0)\n",
      "Requirement already satisfied: pandas in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (1.1.5)\n",
      "Requirement already satisfied: pyarrow>=1.0.0<4.0.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (5.0.0)\n",
      "Requirement already satisfied: s3fs in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from datasets[s3]==1.6.2) (2021.4.0)\n",
      "Collecting botocore==1.19.52\n",
      "  Downloading botocore-1.19.52-py2.py3-none-any.whl (7.2 MB)\n",
      "\u001B[K     |████████████████████████████████| 7.2 MB 31.0 MB/s eta 0:00:01\n",
      "\u001B[?25hCollecting boto3==1.16.43\n",
      "  Downloading boto3-1.16.43-py2.py3-none-any.whl (130 kB)\n",
      "\u001B[K     |████████████████████████████████| 130 kB 70.6 MB/s eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from boto3==1.16.43->datasets[s3]==1.6.2) (0.10.0)\n",
      "Collecting s3transfer<0.4.0,>=0.3.0\n",
      "  Downloading s3transfer-0.3.7-py2.py3-none-any.whl (73 kB)\n",
      "\u001B[K     |████████████████████████████████| 73 kB 583 kB/s  eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: urllib3<1.27,>=1.25.4 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from botocore==1.19.52->datasets[s3]==1.6.2) (1.26.7)\n",
      "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from botocore==1.19.52->datasets[s3]==1.6.2) (2.8.2)\n",
      "Requirement already satisfied: attrs in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (21.2.0)\n",
      "Requirement already satisfied: google-pasta in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (0.2.0)\n",
      "Requirement already satisfied: protobuf>=3.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (3.19.0)\n",
      "Requirement already satisfied: protobuf3-to-dict>=0.1.5 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (0.1.5)\n",
      "Requirement already satisfied: smdebug_rulesconfig==1.0.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (1.0.1)\n",
      "Requirement already satisfied: pathos in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sagemaker>=2.48.0) (0.2.8)\n",
      "Requirement already satisfied: typing-extensions>=3.6.4 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from importlib-metadata->transformers==4.6.1) (3.10.0.2)\n",
      "Requirement already satisfied: zipp>=0.5 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from importlib-metadata->transformers==4.6.1) (3.6.0)\n",
      "Requirement already satisfied: pyparsing>=2.0.2 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from packaging->transformers==4.6.1) (3.0.1)\n",
      "Requirement already satisfied: six in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from protobuf3-to-dict>=0.1.5->sagemaker>=2.48.0) (1.16.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from requests->transformers==4.6.1) (2021.10.8)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from requests->transformers==4.6.1) (3.3)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from requests->transformers==4.6.1) (2.0.7)\n",
      "Requirement already satisfied: pytz>=2017.2 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from pandas->datasets[s3]==1.6.2) (2021.3)\n",
      "Requirement already satisfied: pox>=0.3.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from pathos->sagemaker>=2.48.0) (0.3.0)\n",
      "Requirement already satisfied: ppft>=1.6.6.4 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from pathos->sagemaker>=2.48.0) (1.6.6.4)\n",
      "Requirement already satisfied: aiobotocore>=1.0.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from s3fs->datasets[s3]==1.6.2) (1.3.0)\n",
      "Collecting aiobotocore>=1.0.1\n",
      "  Downloading aiobotocore-2.0.0.tar.gz (52 kB)\n",
      "\u001B[K     |████████████████████████████████| 52 kB 2.8 MB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.4.2.tar.gz (52 kB)\n",
      "\u001B[K     |████████████████████████████████| 52 kB 1.8 MB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.4.1.tar.gz (52 kB)\n",
      "\u001B[K     |████████████████████████████████| 52 kB 168 kB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.4.0.tar.gz (51 kB)\n",
      "\u001B[K     |████████████████████████████████| 51 kB 624 kB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.3.3.tar.gz (50 kB)\n",
      "\u001B[K     |████████████████████████████████| 50 kB 9.7 MB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.3.2.tar.gz (49 kB)\n",
      "\u001B[K     |████████████████████████████████| 49 kB 8.8 MB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.3.1.tar.gz (48 kB)\n",
      "\u001B[K     |████████████████████████████████| 48 kB 1.5 MB/s  eta 0:00:01\n",
      "\u001B[?25h  Downloading aiobotocore-1.2.2.tar.gz (48 kB)\n",
      "\u001B[K     |████████████████████████████████| 48 kB 7.6 MB/s  eta 0:00:01\n",
      "\u001B[?25hRequirement already satisfied: aiohttp>=3.3.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (3.7.4.post0)\n",
      "Requirement already satisfied: wrapt>=1.10.10 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (1.12.1)\n",
      "Requirement already satisfied: aioitertools>=0.5.1 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (0.7.1)\n",
      "Requirement already satisfied: chardet<5.0,>=2.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiohttp>=3.3.1->aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (3.0.4)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiohttp>=3.3.1->aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (1.6.3)\n",
      "Requirement already satisfied: idna-ssl>=1.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiohttp>=3.3.1->aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (1.1.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiohttp>=3.3.1->aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (5.1.0)\n",
      "Requirement already satisfied: async-timeout<4.0,>=3.0 in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from aiohttp>=3.3.1->aiobotocore>=1.0.1->s3fs->datasets[s3]==1.6.2) (3.0.1)\n",
      "Requirement already satisfied: click in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sacremoses->transformers==4.6.1) (7.1.2)\n",
      "Requirement already satisfied: joblib in /home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages (from sacremoses->transformers==4.6.1) (1.0.1)\n",
      "Building wheels for collected packages: sagemaker, aiobotocore\n",
      "  Building wheel for sagemaker (setup.py) ... \u001B[?25ldone\n",
      "\u001B[?25h  Created wheel for sagemaker: filename=sagemaker-2.68.0-py2.py3-none-any.whl size=625601 sha256=24493959faf6abda0c605c1339c8cd9b261abe6ceab32aa863abb92b27d569fe\n",
      "  Stored in directory: /home/ec2-user/.cache/pip/wheels/88/c4/a1/7333eee34871b9fdc7ca51effe2d5146ca654418862e5d41e4\n",
      "  Building wheel for aiobotocore (setup.py) ... \u001B[?25ldone\n",
      "\u001B[?25h  Created wheel for aiobotocore: filename=aiobotocore-1.2.2-py3-none-any.whl size=45750 sha256=dfc217e74e54563249ec6d52066879864695d0d9d5390c2cc694ade302de05f8\n",
      "  Stored in directory: /home/ec2-user/.cache/pip/wheels/37/f3/76/dfc2d32494696a7e4710b2f57d9d15212226d19c42dc395865\n",
      "Successfully built sagemaker aiobotocore\n",
      "Installing collected packages: tqdm, filelock, botocore, xxhash, s3transfer, huggingface-hub, aiobotocore, tokenizers, sacremoses, datasets, boto3, transformers, sagemaker\n",
      "  Attempting uninstall: tqdm\n",
      "    Found existing installation: tqdm 4.62.3\n",
      "    Uninstalling tqdm-4.62.3:\n",
      "      Successfully uninstalled tqdm-4.62.3\n",
      "  Attempting uninstall: botocore\n",
      "    Found existing installation: botocore 1.22.3\n",
      "    Uninstalling botocore-1.22.3:\n",
      "      Successfully uninstalled botocore-1.22.3\n",
      "  Attempting uninstall: s3transfer\n",
      "    Found existing installation: s3transfer 0.5.0\n",
      "    Uninstalling s3transfer-0.5.0:\n",
      "      Successfully uninstalled s3transfer-0.5.0\n",
      "  Attempting uninstall: aiobotocore\n",
      "    Found existing installation: aiobotocore 1.3.0\n",
      "    Uninstalling aiobotocore-1.3.0:\n",
      "      Successfully uninstalled aiobotocore-1.3.0\n",
      "  Attempting uninstall: boto3\n",
      "    Found existing installation: boto3 1.19.3\n",
      "    Uninstalling boto3-1.19.3:\n",
      "      Successfully uninstalled boto3-1.19.3\n",
      "  Attempting uninstall: sagemaker\n",
      "    Found existing installation: sagemaker 2.66.0\n",
      "    Uninstalling sagemaker-2.66.0:\n",
      "      Successfully uninstalled sagemaker-2.66.0\n",
      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "awscli 1.21.3 requires botocore==1.22.3, but you have botocore 1.19.52 which is incompatible.\n",
      "awscli 1.21.3 requires s3transfer<0.6.0,>=0.5.0, but you have s3transfer 0.3.7 which is incompatible.\u001B[0m\n",
      "Successfully installed aiobotocore-1.2.2 boto3-1.17.49 botocore-1.20.49 datasets-1.6.2 filelock-3.3.2 huggingface-hub-0.0.8 s3transfer-0.3.7 sacremoses-0.0.46 sagemaker-2.68.0 tokenizers-0.10.3 tqdm-4.49.0 transformers-4.6.1 xxhash-2.0.2\n",
      "\u001B[33mWARNING: You are using pip version 21.2.4; however, version 21.3.1 is available.\n",
      "You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_latest_p36/bin/python -m pip install --upgrade pip' command.\u001B[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install \"sagemaker>=2.48.0\" \"transformers==4.6.1\" \"datasets[s3]==1.6.2\" --upgrade"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8ec4b075",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IAM role arn used for running training: arn:aws:iam::847380964353:role/spot-bot-SpotSageMakerExecutionRole-917OYJPI7O18\n",
      "S3 bucket used for storing artifacts: sagemaker-us-west-2-847380964353\n"
     ]
    }
   ],
   "source": [
    "import sagemaker.huggingface\n",
    "import sagemaker\n",
    "\n",
    "sess = sagemaker.Session()\n",
    "role = sagemaker.get_execution_role()\n",
    "\n",
    "print(f\"IAM role arn used for running training: {role}\")\n",
    "print(f\"S3 bucket used for storing artifacts: {sess.default_bucket()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3b585957",
   "metadata": {},
   "source": [
    "## train-classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ab0d7886",
   "metadata": {},
   "outputs": [],
   "source": [
    "## prepare data\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "data = pd.read_csv('../all_saved_train.csv', encoding='latin-1')\n",
    "data.columns=[\"label\",\"v2\"]\n",
    "\n",
    "# use csv file to test \n",
    "data[:180000].to_csv('./train.csv',index=False,encoding='utf-8')\n",
    "data[180000:].to_csv('./test.csv',index=False,encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e12c96c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "prefix='hp-datalab'\n",
    "\n",
    "bucket = sess.default_bucket() \n",
    "boto3.Session().resource(\"s3\").Bucket(bucket).Object(\n",
    "    os.path.join(prefix, \"train/train.csv\")\n",
    ").upload_file(\"./train.csv\")\n",
    "boto3.Session().resource(\"s3\").Bucket(bucket).Object(\n",
    "    os.path.join(prefix, \"test/test.csv\")\n",
    ").upload_file(\"./test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8424e810",
   "metadata": {},
   "outputs": [],
   "source": [
    "training_input_path = f's3://{sess.default_bucket()}/{prefix}/train/train.csv'\n",
    "test_input_path = f's3://{sess.default_bucket()}/{prefix}/test/test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f7d9cae5",
   "metadata": {},
   "outputs": [],
   "source": [
    "git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.6.1'} # v4.6.1 is referring to the `transformers_version` you use in the estimator."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5691f544",
   "metadata": {},
   "outputs": [],
   "source": [
    "hyperparameters={'per_device_train_batch_size':4,\n",
    "                 'per_device_eval_batch_size': 4,\n",
    "                 'model_name_or_path': 'roberta-large',\n",
    "                 'train_file':'/opt/ml/input/data/train/train.csv',\n",
    "                 'validation_file':'/opt/ml/input/data/test/test.csv',\n",
    "                 'test_file':'/opt/ml/input/data/test/test.csv',\n",
    "                 'do_train': True,\n",
    "                 'do_predict': True,\n",
    "                 'do_eval': True,\n",
    "                 'save_total_limit':3,\n",
    "                 'num_train_epochs': 3,\n",
    "                 'output_dir': '/opt/ml/model',\n",
    "                 'num_train_epochs': 1,\n",
    "                 'learning_rate': 5e-5,\n",
    "                 'seed': 7,\n",
    "                 'fp16': False,\n",
    "                 'eval_steps': 1000,\n",
    "                 }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "99952578",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.huggingface import HuggingFace\n",
    "\n",
    "# create the Estimator\n",
    "huggingface_estimator = HuggingFace(\n",
    "      entry_point='run_glue.py', # script\n",
    "      source_dir='./examples/pytorch/text-classification', # relative path to example\n",
    "      git_config=git_config,\n",
    "      instance_type='ml.p2.8xlarge',\n",
    "      instance_count=1,\n",
    "      volume_size=500,\n",
    "      transformers_version='4.6',\n",
    "      pytorch_version='1.7',\n",
    "      py_version='py36',\n",
    "      role=role,\n",
    "      base_job_name='roberta-large-epoch3',\n",
    "      hyperparameters = hyperparameters\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fe2cb41",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2021-11-09 06:27:49 Starting - Starting the training job...\n",
      "2021-11-09 06:28:12 Starting - Launching requested ML instancesProfilerReport-1636439263: InProgress\n",
      ".........\n",
      "2021-11-09 06:29:35 Starting - Preparing the instances for training.........\n",
      "2021-11-09 06:31:13 Downloading - Downloading input data\n",
      "2021-11-09 06:31:13 Training - Downloading the training image.................\u001B[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device\u001B[0m\n",
      "\u001B[34mbash: no job control in this shell\u001B[0m\n",
      "\u001B[34m2021-11-09 06:34:03,029 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training\u001B[0m\n",
      "\u001B[34m2021-11-09 06:34:03,108 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.\u001B[0m\n",
      "\u001B[34m2021-11-09 06:34:04,565 sagemaker_pytorch_container.training INFO     Invoking user training script.\u001B[0m\n",
      "\u001B[34m2021-11-09 06:34:05,016 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:\u001B[0m\n",
      "\u001B[34m/opt/conda/bin/python3.6 -m pip install -r requirements.txt\u001B[0m\n",
      "\u001B[34mCollecting accelerate\n",
      "  Downloading accelerate-0.5.1-py3-none-any.whl (58 kB)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: datasets>=1.1.3 in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 2)) (1.6.2)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: sentencepiece!=0.1.92 in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 3)) (0.1.91)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: protobuf in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 4)) (3.17.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: torch>=1.3 in /opt/conda/lib/python3.6/site-packages (from -r requirements.txt (line 5)) (1.7.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: pandas in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (1.1.5)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: importlib-metadata in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (4.0.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (2.25.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: fsspec in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (2021.5.0)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: huggingface-hub<0.1.0 in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (0.0.8)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (1.19.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: tqdm<4.50.0,>=4.27 in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (4.49.0)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: pyarrow>=1.0.0<4.0.0 in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (4.0.0)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: xxhash in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (2.0.2)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: dataclasses in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (0.8)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: dill in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (0.3.3)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: multiprocess in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (0.70.11.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: packaging in /opt/conda/lib/python3.6/site-packages (from datasets>=1.1.3->-r requirements.txt (line 2)) (20.9)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.6/site-packages (from torch>=1.3->-r requirements.txt (line 5)) (3.10.0.0)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: filelock in /opt/conda/lib/python3.6/site-packages (from huggingface-hub<0.1.0->datasets>=1.1.3->-r requirements.txt (line 2)) (3.0.12)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests>=2.19.0->datasets>=1.1.3->-r requirements.txt (line 2)) (3.0.4)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.6/site-packages (from requests>=2.19.0->datasets>=1.1.3->-r requirements.txt (line 2)) (2020.12.5)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.6/site-packages (from requests>=2.19.0->datasets>=1.1.3->-r requirements.txt (line 2)) (1.25.11)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests>=2.19.0->datasets>=1.1.3->-r requirements.txt (line 2)) (2.10)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: pyyaml in /opt/conda/lib/python3.6/site-packages (from accelerate->-r requirements.txt (line 1)) (5.4.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: six>=1.9 in /opt/conda/lib/python3.6/site-packages (from protobuf->-r requirements.txt (line 4)) (1.16.0)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.6/site-packages (from importlib-metadata->datasets>=1.1.3->-r requirements.txt (line 2)) (3.4.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.6/site-packages (from packaging->datasets>=1.1.3->-r requirements.txt (line 2)) (2.4.7)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.6/site-packages (from pandas->datasets>=1.1.3->-r requirements.txt (line 2)) (2.8.1)\u001B[0m\n",
      "\u001B[34mRequirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.6/site-packages (from pandas->datasets>=1.1.3->-r requirements.txt (line 2)) (2021.1)\u001B[0m\n",
      "\u001B[34mInstalling collected packages: accelerate\u001B[0m\n",
      "\u001B[34mSuccessfully installed accelerate-0.5.1\u001B[0m\n",
      "\u001B[34mWARNING: Running pip as root will break packages and permissions. You should install packages reliably by using venv: https://pip.pypa.io/warnings/venv\u001B[0m\n",
      "\u001B[34m2021-11-09 06:34:08,150 sagemaker-training-toolkit INFO     Invoking user script\u001B[0m\n",
      "\u001B[34mTraining Env:\u001B[0m\n",
      "\u001B[34m{\n",
      "    \"additional_framework_parameters\": {},\n",
      "    \"channel_input_dirs\": {\n",
      "        \"test\": \"/opt/ml/input/data/test\",\n",
      "        \"train\": \"/opt/ml/input/data/train\"\n",
      "    },\n",
      "    \"current_host\": \"algo-1\",\n",
      "    \"framework_module\": \"sagemaker_pytorch_container.training:main\",\n",
      "    \"hosts\": [\n",
      "        \"algo-1\"\n",
      "    ],\n",
      "    \"hyperparameters\": {\n",
      "        \"per_device_eval_batch_size\": 4,\n",
      "        \"seed\": 7,\n",
      "        \"validation_file\": \"/opt/ml/input/data/test/test.csv\",\n",
      "        \"do_predict\": true,\n",
      "        \"do_train\": true,\n",
      "        \"test_file\": \"/opt/ml/input/data/test/test.csv\",\n",
      "        \"save_total_limit\": 3,\n",
      "        \"num_train_epochs\": 1,\n",
      "        \"do_eval\": true,\n",
      "        \"train_file\": \"/opt/ml/input/data/train/train.csv\",\n",
      "        \"output_dir\": \"/opt/ml/model\",\n",
      "        \"eval_steps\": 1000,\n",
      "        \"per_device_train_batch_size\": 4,\n",
      "        \"learning_rate\": 5e-05,\n",
      "        \"model_name_or_path\": \"roberta-large\",\n",
      "        \"fp16\": false\n",
      "    },\n",
      "    \"input_config_dir\": \"/opt/ml/input/config\",\n",
      "    \"input_data_config\": {\n",
      "        \"test\": {\n",
      "            \"TrainingInputMode\": \"File\",\n",
      "            \"S3DistributionType\": \"FullyReplicated\",\n",
      "            \"RecordWrapperType\": \"None\"\n",
      "        },\n",
      "        \"train\": {\n",
      "            \"TrainingInputMode\": \"File\",\n",
      "            \"S3DistributionType\": \"FullyReplicated\",\n",
      "            \"RecordWrapperType\": \"None\"\n",
      "        }\n",
      "    },\n",
      "    \"input_dir\": \"/opt/ml/input\",\n",
      "    \"is_master\": true,\n",
      "    \"job_name\": \"roberta-large-epoch3-2021-11-09-06-27-43-316\",\n",
      "    \"log_level\": 20,\n",
      "    \"master_hostname\": \"algo-1\",\n",
      "    \"model_dir\": \"/opt/ml/model\",\n",
      "    \"module_dir\": \"s3://sagemaker-us-west-2-847380964353/roberta-large-epoch3-2021-11-09-06-27-43-316/source/sourcedir.tar.gz\",\n",
      "    \"module_name\": \"run_glue\",\n",
      "    \"network_interface_name\": \"eth0\",\n",
      "    \"num_cpus\": 32,\n",
      "    \"num_gpus\": 8,\n",
      "    \"output_data_dir\": \"/opt/ml/output/data\",\n",
      "    \"output_dir\": \"/opt/ml/output\",\n",
      "    \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
      "    \"resource_config\": {\n",
      "        \"current_host\": \"algo-1\",\n",
      "        \"hosts\": [\n",
      "            \"algo-1\"\n",
      "        ],\n",
      "        \"network_interface_name\": \"eth0\"\n",
      "    },\n",
      "    \"user_entry_point\": \"run_glue.py\"\u001B[0m\n",
      "\u001B[34m}\u001B[0m\n",
      "\u001B[34mEnvironment variables:\u001B[0m\n",
      "\u001B[34mSM_HOSTS=[\"algo-1\"]\u001B[0m\n",
      "\u001B[34mSM_NETWORK_INTERFACE_NAME=eth0\u001B[0m\n",
      "\u001B[34mSM_HPS={\"do_eval\":true,\"do_predict\":true,\"do_train\":true,\"eval_steps\":1000,\"fp16\":false,\"learning_rate\":5e-05,\"model_name_or_path\":\"roberta-large\",\"num_train_epochs\":1,\"output_dir\":\"/opt/ml/model\",\"per_device_eval_batch_size\":4,\"per_device_train_batch_size\":4,\"save_total_limit\":3,\"seed\":7,\"test_file\":\"/opt/ml/input/data/test/test.csv\",\"train_file\":\"/opt/ml/input/data/train/train.csv\",\"validation_file\":\"/opt/ml/input/data/test/test.csv\"}\u001B[0m\n",
      "\u001B[34mSM_USER_ENTRY_POINT=run_glue.py\u001B[0m\n",
      "\u001B[34mSM_FRAMEWORK_PARAMS={}\u001B[0m\n",
      "\u001B[34mSM_RESOURCE_CONFIG={\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"}\u001B[0m\n",
      "\u001B[34mSM_INPUT_DATA_CONFIG={\"test\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}}\u001B[0m\n",
      "\u001B[34mSM_OUTPUT_DATA_DIR=/opt/ml/output/data\u001B[0m\n",
      "\u001B[34mSM_CHANNELS=[\"test\",\"train\"]\u001B[0m\n",
      "\u001B[34mSM_CURRENT_HOST=algo-1\u001B[0m\n",
      "\u001B[34mSM_MODULE_NAME=run_glue\u001B[0m\n",
      "\u001B[34mSM_LOG_LEVEL=20\u001B[0m\n",
      "\u001B[34mSM_FRAMEWORK_MODULE=sagemaker_pytorch_container.training:main\u001B[0m\n",
      "\u001B[34mSM_INPUT_DIR=/opt/ml/input\u001B[0m\n",
      "\u001B[34mSM_INPUT_CONFIG_DIR=/opt/ml/input/config\u001B[0m\n",
      "\u001B[34mSM_OUTPUT_DIR=/opt/ml/output\u001B[0m\n",
      "\u001B[34mSM_NUM_CPUS=32\u001B[0m\n",
      "\u001B[34mSM_NUM_GPUS=8\u001B[0m\n",
      "\u001B[34mSM_MODEL_DIR=/opt/ml/model\u001B[0m\n",
      "\u001B[34mSM_MODULE_DIR=s3://sagemaker-us-west-2-847380964353/roberta-large-epoch3-2021-11-09-06-27-43-316/source/sourcedir.tar.gz\u001B[0m\n",
      "\u001B[34mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"test\":\"/opt/ml/input/data/test\",\"train\":\"/opt/ml/input/data/train\"},\"current_host\":\"algo-1\",\"framework_module\":\"sagemaker_pytorch_container.training:main\",\"hosts\":[\"algo-1\"],\"hyperparameters\":{\"do_eval\":true,\"do_predict\":true,\"do_train\":true,\"eval_steps\":1000,\"fp16\":false,\"learning_rate\":5e-05,\"model_name_or_path\":\"roberta-large\",\"num_train_epochs\":1,\"output_dir\":\"/opt/ml/model\",\"per_device_eval_batch_size\":4,\"per_device_train_batch_size\":4,\"save_total_limit\":3,\"seed\":7,\"test_file\":\"/opt/ml/input/data/test/test.csv\",\"train_file\":\"/opt/ml/input/data/train/train.csv\",\"validation_file\":\"/opt/ml/input/data/test/test.csv\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"test\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"train\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"roberta-large-epoch3-2021-11-09-06-27-43-316\",\"log_level\":20,\"master_hostname\":\"algo-1\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-us-west-2-847380964353/roberta-large-epoch3-2021-11-09-06-27-43-316/source/sourcedir.tar.gz\",\"module_name\":\"run_glue\",\"network_interface_name\":\"eth0\",\"num_cpus\":32,\"num_gpus\":8,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"},\"user_entry_point\":\"run_glue.py\"}\u001B[0m\n",
      "\u001B[34mSM_USER_ARGS=[\"--do_eval\",\"True\",\"--do_predict\",\"True\",\"--do_train\",\"True\",\"--eval_steps\",\"1000\",\"--fp16\",\"False\",\"--learning_rate\",\"5e-05\",\"--model_name_or_path\",\"roberta-large\",\"--num_train_epochs\",\"1\",\"--output_dir\",\"/opt/ml/model\",\"--per_device_eval_batch_size\",\"4\",\"--per_device_train_batch_size\",\"4\",\"--save_total_limit\",\"3\",\"--seed\",\"7\",\"--test_file\",\"/opt/ml/input/data/test/test.csv\",\"--train_file\",\"/opt/ml/input/data/train/train.csv\",\"--validation_file\",\"/opt/ml/input/data/test/test.csv\"]\u001B[0m\n",
      "\u001B[34mSM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\u001B[0m\n",
      "\u001B[34mSM_CHANNEL_TEST=/opt/ml/input/data/test\u001B[0m\n",
      "\u001B[34mSM_CHANNEL_TRAIN=/opt/ml/input/data/train\u001B[0m\n",
      "\u001B[34mSM_HP_PER_DEVICE_EVAL_BATCH_SIZE=4\u001B[0m\n",
      "\u001B[34mSM_HP_SEED=7\u001B[0m\n",
      "\u001B[34mSM_HP_VALIDATION_FILE=/opt/ml/input/data/test/test.csv\u001B[0m\n",
      "\u001B[34mSM_HP_DO_PREDICT=true\u001B[0m\n",
      "\u001B[34mSM_HP_DO_TRAIN=true\u001B[0m\n",
      "\u001B[34mSM_HP_TEST_FILE=/opt/ml/input/data/test/test.csv\u001B[0m\n",
      "\u001B[34mSM_HP_SAVE_TOTAL_LIMIT=3\u001B[0m\n",
      "\u001B[34mSM_HP_NUM_TRAIN_EPOCHS=1\u001B[0m\n",
      "\u001B[34mSM_HP_DO_EVAL=true\u001B[0m\n",
      "\u001B[34mSM_HP_TRAIN_FILE=/opt/ml/input/data/train/train.csv\u001B[0m\n",
      "\u001B[34mSM_HP_OUTPUT_DIR=/opt/ml/model\u001B[0m\n",
      "\u001B[34mSM_HP_EVAL_STEPS=1000\u001B[0m\n",
      "\u001B[34mSM_HP_PER_DEVICE_TRAIN_BATCH_SIZE=4\u001B[0m\n",
      "\u001B[34mSM_HP_LEARNING_RATE=5e-05\u001B[0m\n",
      "\u001B[34mSM_HP_MODEL_NAME_OR_PATH=roberta-large\u001B[0m\n",
      "\u001B[34mSM_HP_FP16=false\u001B[0m\n",
      "\u001B[34mPYTHONPATH=/opt/ml/code:/opt/conda/bin:/opt/conda/lib/python36.zip:/opt/conda/lib/python3.6:/opt/conda/lib/python3.6/lib-dynload:/opt/conda/lib/python3.6/site-packages\u001B[0m\n",
      "\u001B[34mInvoking script with the following command:\u001B[0m\n",
      "\u001B[34m/opt/conda/bin/python3.6 run_glue.py --do_eval True --do_predict True --do_train True --eval_steps 1000 --fp16 False --learning_rate 5e-05 --model_name_or_path roberta-large --num_train_epochs 1 --output_dir /opt/ml/model --per_device_eval_batch_size 4 --per_device_train_batch_size 4 --save_total_limit 3 --seed 7 --test_file /opt/ml/input/data/test/test.csv --train_file /opt/ml/input/data/train/train.csv --validation_file /opt/ml/input/data/test/test.csv\u001B[0m\n",
      "\n",
      "2021-11-09 06:34:14 Training - Training image download completed. Training in progress.\u001B[34m11/09/2021 06:34:13 - WARNING - __main__ -   Process rank: -1, device: cuda:0, n_gpu: 8distributed training: False, 16-bits training: False\u001B[0m\n",
      "\u001B[34m11/09/2021 06:34:13 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(output_dir=/opt/ml/model, overwrite_output_dir=False, do_train=True, do_eval=True, do_predict=True, evaluation_strategy=IntervalStrategy.NO, prediction_loss_only=False, per_device_train_batch_size=4, per_device_eval_batch_size=4, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=5e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=SchedulerType.LINEAR, warmup_ratio=0.0, warmup_steps=0, logging_dir=runs/Nov09_06-34-12_algo-1, logging_strategy=IntervalStrategy.STEPS, logging_first_step=False, logging_steps=500, save_strategy=IntervalStrategy.STEPS, save_steps=500, save_total_limit=3, no_cuda=False, seed=7, fp16=False, fp16_opt_level=O1, fp16_backend=auto, fp16_full_eval=False, local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=1000, dataloader_num_workers=0, past_index=-1, run_name=/opt/ml/model, disable_tqdm=False, remove_unused_columns=True, label_names=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, ignore_data_skip=False, sharded_ddp=[], deepspeed=None, label_smoothing_factor=0.0, adafactor=False, group_by_length=False, length_column_name=length, report_to=[], ddp_find_unused_parameters=None, dataloader_pin_memory=True, skip_memory_metrics=False, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, _n_gpu=8, mp_parameters=)\u001B[0m\n",
      "\u001B[34m11/09/2021 06:34:13 - INFO - __main__ -   load a local file for train: /opt/ml/input/data/train/train.csv\u001B[0m\n",
      "\u001B[34m11/09/2021 06:34:13 - INFO - __main__ -   load a local file for validation: /opt/ml/input/data/test/test.csv\u001B[0m\n",
      "\u001B[34m11/09/2021 06:34:13 - INFO - __main__ -   load a local file for test: /opt/ml/input/data/test/test.csv\u001B[0m\n",
      "\u001B[34m11/09/2021 06:34:13 - WARNING - datasets.builder -   Using custom data configuration default-4eeefc263adf7f8a\u001B[0m\n",
      "\u001B[34mDownloading and preparing dataset csv/default (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /root/.cache/huggingface/datasets/csv/default-4eeefc263adf7f8a/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0...\u001B[0m\n",
      "\u001B[34mDataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-4eeefc263adf7f8a/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0. Subsequent calls will reuse this data.\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1532] 2021-11-09 06:34:14,826 >> https://huggingface.co/roberta-large/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpgsv_p5n2\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1536] 2021-11-09 06:34:15,118 >> storing https://huggingface.co/roberta-large/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1544] 2021-11-09 06:34:15,118 >> creating metadata file for /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:517] 2021-11-09 06:34:15,119 >> loading configuration file https://huggingface.co/roberta-large/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:553] 2021-11-09 06:34:15,120 >> Model config RobertaConfig {\n",
      "  \"architectures\": [\n",
      "    \"RobertaForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"bos_token_id\": 0,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"gradient_checkpointing\": false,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 1024,\n",
      "  \"id2label\": {\n",
      "    \"0\": \"LABEL_0\",\n",
      "    \"1\": \"LABEL_1\",\n",
      "    \"2\": \"LABEL_2\",\n",
      "    \"3\": \"LABEL_3\",\n",
      "    \"4\": \"LABEL_4\",\n",
      "    \"5\": \"LABEL_5\",\n",
      "    \"6\": \"LABEL_6\",\n",
      "    \"7\": \"LABEL_7\",\n",
      "    \"8\": \"LABEL_8\",\n",
      "    \"9\": \"LABEL_9\",\n",
      "    \"10\": \"LABEL_10\",\n",
      "    \"11\": \"LABEL_11\",\n",
      "    \"12\": \"LABEL_12\",\n",
      "    \"13\": \"LABEL_13\",\n",
      "    \"14\": \"LABEL_14\",\n",
      "    \"15\": \"LABEL_15\",\n",
      "    \"16\": \"LABEL_16\",\n",
      "    \"17\": \"LABEL_17\",\n",
      "    \"18\": \"LABEL_18\",\n",
      "    \"19\": \"LABEL_19\",\n",
      "    \"20\": \"LABEL_20\",\n",
      "    \"21\": \"LABEL_21\",\n",
      "    \"22\": \"LABEL_22\",\n",
      "    \"23\": \"LABEL_23\",\n",
      "    \"24\": \"LABEL_24\",\n",
      "    \"25\": \"LABEL_25\",\n",
      "    \"26\": \"LABEL_26\",\n",
      "    \"27\": \"LABEL_27\",\n",
      "    \"28\": \"LABEL_28\",\n",
      "    \"29\": \"LABEL_29\",\n",
      "    \"30\": \"LABEL_30\"\n",
      "  },\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 4096,\n",
      "  \"label2id\": {\n",
      "    \"LABEL_0\": 0,\n",
      "    \"LABEL_1\": 1,\n",
      "    \"LABEL_10\": 10,\n",
      "    \"LABEL_11\": 11,\n",
      "    \"LABEL_12\": 12,\n",
      "    \"LABEL_13\": 13,\n",
      "    \"LABEL_14\": 14,\n",
      "    \"LABEL_15\": 15,\n",
      "    \"LABEL_16\": 16,\n",
      "    \"LABEL_17\": 17,\n",
      "    \"LABEL_18\": 18,\n",
      "    \"LABEL_19\": 19,\n",
      "    \"LABEL_2\": 2,\n",
      "    \"LABEL_20\": 20,\n",
      "    \"LABEL_21\": 21,\n",
      "    \"LABEL_22\": 22,\n",
      "    \"LABEL_23\": 23,\n",
      "    \"LABEL_24\": 24,\n",
      "    \"LABEL_25\": 25,\n",
      "    \"LABEL_26\": 26,\n",
      "    \"LABEL_27\": 27,\n",
      "    \"LABEL_28\": 28,\n",
      "    \"LABEL_29\": 29,\n",
      "    \"LABEL_3\": 3,\n",
      "    \"LABEL_30\": 30,\n",
      "    \"LABEL_4\": 4,\n",
      "    \"LABEL_5\": 5,\n",
      "    \"LABEL_6\": 6,\n",
      "    \"LABEL_7\": 7,\n",
      "    \"LABEL_8\": 8,\n",
      "    \"LABEL_9\": 9\n",
      "  },\n",
      "  \"layer_norm_eps\": 1e-05,\n",
      "  \"max_position_embeddings\": 514,\n",
      "  \"model_type\": \"roberta\",\n",
      "  \"num_attention_heads\": 16,\n",
      "  \"num_hidden_layers\": 24,\n",
      "  \"pad_token_id\": 1,\n",
      "  \"position_embedding_type\": \"absolute\",\n",
      "  \"transformers_version\": \"4.6.1\",\n",
      "  \"type_vocab_size\": 1,\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 50265\u001B[0m\n",
      "\u001B[34m}\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:517] 2021-11-09 06:34:15,403 >> loading configuration file https://huggingface.co/roberta-large/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/dea67b44b38d504f2523f3ddb6acb601b23d67bee52c942da336fa1283100990.94cae8b3a8dbab1d59b9d4827f7ce79e73124efa6bb970412cd503383a95f373\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:553] 2021-11-09 06:34:15,404 >> Model config RobertaConfig {\n",
      "  \"architectures\": [\n",
      "    \"RobertaForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"bos_token_id\": 0,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"gradient_checkpointing\": false,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 1024,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 4096,\n",
      "  \"layer_norm_eps\": 1e-05,\n",
      "  \"max_position_embeddings\": 514,\n",
      "  \"model_type\": \"roberta\",\n",
      "  \"num_attention_heads\": 16,\n",
      "  \"num_hidden_layers\": 24,\n",
      "  \"pad_token_id\": 1,\n",
      "  \"position_embedding_type\": \"absolute\",\n",
      "  \"transformers_version\": \"4.6.1\",\n",
      "  \"type_vocab_size\": 1,\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 50265\u001B[0m\n",
      "\u001B[34m}\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1532] 2021-11-09 06:34:15,664 >> https://huggingface.co/roberta-large/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpcaqjtmsz\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1536] 2021-11-09 06:34:16,818 >> storing https://huggingface.co/roberta-large/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1544] 2021-11-09 06:34:16,819 >> creating metadata file for /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1532] 2021-11-09 06:34:17,129 >> https://huggingface.co/roberta-large/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpmlbdslkb\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1536] 2021-11-09 06:34:17,944 >> storing https://huggingface.co/roberta-large/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1544] 2021-11-09 06:34:17,945 >> creating metadata file for /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1532] 2021-11-09 06:34:18,236 >> https://huggingface.co/roberta-large/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpm9c80zdj\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1536] 2021-11-09 06:34:19,583 >> storing https://huggingface.co/roberta-large/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1544] 2021-11-09 06:34:19,583 >> creating metadata file for /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,454 >> loading file https://huggingface.co/roberta-large/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/7c1ba2435b05451bc3b4da073c8dec9630b22024a65f6c41053caccf2880eb8f.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,454 >> loading file https://huggingface.co/roberta-large/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/20b5a00a80e27ae9accbe25672aba42ad2d4d4cb2c4b9359b50ca8e34e107d6d.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,455 >> loading file https://huggingface.co/roberta-large/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/e16a2590deb9e6d73711d6e05bf27d832fa8c1162d807222e043ca650a556964.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,455 >> loading file https://huggingface.co/roberta-large/resolve/main/added_tokens.json from cache at None\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,455 >> loading file https://huggingface.co/roberta-large/resolve/main/special_tokens_map.json from cache at None\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1717] 2021-11-09 06:34:20,455 >> loading file https://huggingface.co/roberta-large/resolve/main/tokenizer_config.json from cache at None\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1532] 2021-11-09 06:34:20,809 >> https://huggingface.co/roberta-large/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmptk3alu77\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1536] 2021-11-09 06:34:46,358 >> storing https://huggingface.co/roberta-large/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352\u001B[0m\n",
      "\u001B[34m[INFO|file_utils.py:1544] 2021-11-09 06:34:46,358 >> creating metadata file for /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352\u001B[0m\n",
      "\u001B[34m[INFO|modeling_utils.py:1155] 2021-11-09 06:34:46,359 >> loading weights file https://huggingface.co/roberta-large/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/8e36ec2f5052bec1e79e139b84c2c3089cb647694ba0f4f634fec7b8258f7c89.c43841d8c5cd23c435408295164cda9525270aa42cd0cc9200911570c0342352\u001B[0m\n",
      "\u001B[34m[WARNING|modeling_utils.py:1331] 2021-11-09 06:34:50,851 >> Some weights of the model checkpoint at roberta-large were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.bias']\u001B[0m\n",
      "\u001B[34m- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\u001B[0m\n",
      "\u001B[34m- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\u001B[0m\n",
      "\u001B[34m[WARNING|modeling_utils.py:1342] 2021-11-09 06:34:50,851 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']\u001B[0m\n",
      "\u001B[34mYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\u001B[0m\n",
      "\u001B[34m11/09/2021 06:35:01 - INFO - __main__ -   Sample 84890 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'input_ids': [0, 510, 4950, 26665, 1636, 7584, 27535, 83, 264, 254, 344, 11768, 6439, 497, 20, 4414, 4, 25449, 99, 47, 802, 47, 1467, 59, 21592, 2496, 6, 142, 1636, 18, 2992, 5, 177, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 7, 'v2': \"Pregnant Kim Kardashian Rocks A Sheer Jumpsuit At The Airport. Forget what you thought you knew about maternity style, because Kim's changing the game.\"}.\u001B[0m\n",
      "\u001B[34m11/09/2021 06:35:01 - INFO - __main__ -   Sample 39544 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'input_ids': [0, 6179, 38, 2548, 3917, 12830, 46730, 219, 4, 22, 100, 33, 554, 878, 7, 5494, 9946, 5, 169, 127, 2900, 3681, 127, 809, 6, 45, 25, 10, 1363, 7626, 53, 25, 10, 3944, 7, 120, 383, 626, 72, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 29, 'v2': 'How I Outran Misogyny. \"I have started running to retrain the way my brain sees my body, not as a sexual object but as a tool to get things done.\"'}.\u001B[0m\n",
      "\u001B[34m11/09/2021 06:35:01 - INFO - __main__ -   Sample 103500 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'input_ids': [0, 250, 28003, 1740, 5, 6633, 9, 730, 4, 38, 3392, 47, 13, 5, 27255, 1054, 522, 641, 6, 8, 5, 6633, 9, 730, 2010, 3177, 6, 13, 49, 10640, 7, 489, 383, 6327, 8, 7053, 4, 1437, 392, 49, 9187, 535, 7, 12224, 11, 5, 1202, 377, 789, 6, 25, 52, 173, 1567, 18029, 464, 77, 249, 32, 8943, 7, 5, 276, 2074, 25, 97, 2286, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'label': 20, 'v2': 'A Prayer From the Mall of America. I thank you for the Bloomington Police Department, and the Mall of America Security Force, for their willingness to keep things calm and peaceful.  May their humanity continue to shine in the difficult months ahead, as we work towards systemic change when police are accountable to the same laws as other citizens.'}.\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:516] 2021-11-09 06:35:09,215 >> The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: v2.\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1156] 2021-11-09 06:35:09,241 >> ***** Running training *****\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1157] 2021-11-09 06:35:09,241 >>   Num examples = 180000\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1158] 2021-11-09 06:35:09,241 >>   Num Epochs = 1\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1159] 2021-11-09 06:35:09,241 >>   Instantaneous batch size per device = 4\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1160] 2021-11-09 06:35:09,242 >>   Total train batch size (w. parallel, distributed & accumulation) = 32\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1161] 2021-11-09 06:35:09,242 >>   Gradient Accumulation steps = 1\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1162] 2021-11-09 06:35:09,242 >>   Total optimization steps = 5625\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.518 algo-1:32 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.687 algo-1:32 INFO profiler_config_parser.py:102] User has disabled profiler.\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.688 algo-1:32 INFO json_config.py:91] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.689 algo-1:32 INFO hook.py:201] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.691 algo-1:32 INFO hook.py:255] Saving to /opt/ml/output/tensors\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.691 algo-1:32 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.848 algo-1:32 INFO hook.py:591] name:module.roberta.embeddings.word_embeddings.weight count_params:51471360\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.848 algo-1:32 INFO hook.py:591] name:module.roberta.embeddings.position_embeddings.weight count_params:526336\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.embeddings.token_type_embeddings.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.embeddings.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.embeddings.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.849 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.850 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.0.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.851 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.852 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.1.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.853 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.854 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.2.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.855 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.856 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.3.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.857 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.858 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.4.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.859 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.5.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.860 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.861 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.6.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.862 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.7.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.863 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.864 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.8.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.865 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.866 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.9.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.867 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.10.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.868 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.869 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.11.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.870 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.871 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.12.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.872 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.13.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.873 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.874 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.14.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.875 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.15.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.876 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.877 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.16.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.878 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.17.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.879 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.880 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.18.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.881 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.19.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.882 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.883 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.20.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.884 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.885 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.21.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.886 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.887 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.22.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.query.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.query.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.key.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.key.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.value.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.self.value.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.888 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.output.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.attention.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.intermediate.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.intermediate.dense.bias count_params:4096\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.output.dense.weight count_params:4194304\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.output.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.output.LayerNorm.weight count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.889 algo-1:32 INFO hook.py:591] name:module.roberta.encoder.layer.23.output.LayerNorm.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:591] name:module.classifier.dense.weight count_params:1048576\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:591] name:module.classifier.dense.bias count_params:1024\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:591] name:module.classifier.out_proj.weight count_params:31744\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:591] name:module.classifier.out_proj.bias count_params:31\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:593] Total Trainable Params: 355391519\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.890 algo-1:32 INFO hook.py:425] Monitoring the collections: losses\u001B[0m\n",
      "\u001B[34m[2021-11-09 06:35:09.893 algo-1:32 INFO hook.py:488] Hook is writing from the hook with pid: 32\u001B[0m\n",
      "\u001B[34mNCCL version 2.7.8+cuda11.0\u001B[0m\n",
      "\u001B[34m{'loss': 1.4664, 'learning_rate': 4.555555555555556e-05, 'epoch': 0.09}\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1885] 2021-11-09 07:03:01,933 >> Saving model checkpoint to /opt/ml/model/checkpoint-500\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:351] 2021-11-09 07:03:01,935 >> Configuration saved in /opt/ml/model/checkpoint-500/config.json\u001B[0m\n",
      "\u001B[34m[INFO|modeling_utils.py:889] 2021-11-09 07:03:05,918 >> Model weights saved in /opt/ml/model/checkpoint-500/pytorch_model.bin\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1924] 2021-11-09 07:03:05,920 >> tokenizer config file saved in /opt/ml/model/checkpoint-500/tokenizer_config.json\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1930] 2021-11-09 07:03:05,920 >> Special tokens file saved in /opt/ml/model/checkpoint-500/special_tokens_map.json\u001B[0m\n",
      "\u001B[34m{'loss': 1.1723, 'learning_rate': 4.111111111111111e-05, 'epoch': 0.18}\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1885] 2021-11-09 07:31:07,186 >> Saving model checkpoint to /opt/ml/model/checkpoint-1000\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:351] 2021-11-09 07:31:07,188 >> Configuration saved in /opt/ml/model/checkpoint-1000/config.json\u001B[0m\n",
      "\u001B[34m[INFO|modeling_utils.py:889] 2021-11-09 07:31:11,198 >> Model weights saved in /opt/ml/model/checkpoint-1000/pytorch_model.bin\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1924] 2021-11-09 07:31:11,199 >> tokenizer config file saved in /opt/ml/model/checkpoint-1000/tokenizer_config.json\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1930] 2021-11-09 07:31:11,199 >> Special tokens file saved in /opt/ml/model/checkpoint-1000/special_tokens_map.json\u001B[0m\n",
      "\u001B[34m{'loss': 1.0787, 'learning_rate': 3.6666666666666666e-05, 'epoch': 0.27}\u001B[0m\n",
      "\u001B[34m[INFO|trainer.py:1885] 2021-11-09 07:59:11,083 >> Saving model checkpoint to /opt/ml/model/checkpoint-1500\u001B[0m\n",
      "\u001B[34m[INFO|configuration_utils.py:351] 2021-11-09 07:59:11,085 >> Configuration saved in /opt/ml/model/checkpoint-1500/config.json\u001B[0m\n",
      "\u001B[34m[INFO|modeling_utils.py:889] 2021-11-09 07:59:15,008 >> Model weights saved in /opt/ml/model/checkpoint-1500/pytorch_model.bin\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1924] 2021-11-09 07:59:15,009 >> tokenizer config file saved in /opt/ml/model/checkpoint-1500/tokenizer_config.json\u001B[0m\n",
      "\u001B[34m[INFO|tokenization_utils_base.py:1930] 2021-11-09 07:59:15,009 >> Special tokens file saved in /opt/ml/model/checkpoint-1500/special_tokens_map.json\u001B[0m\n"
     ]
    }
   ],
   "source": [
    "huggingface_estimator.fit({'train':'s3://sagemaker-us-west-2-847380964353/hp-datalab/train/train.csv','test':'s3://sagemaker-us-west-2-847380964353/hp-datalab/test/test.csv'})\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aa7b8505",
   "metadata": {
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## deploy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "from sagemaker.huggingface import HuggingFaceModel\n",
    "import sagemaker\n",
    "\n",
    "role = sagemaker.get_execution_role()\n",
    "\n",
    "# create Hugging Face Model Class\n",
    "huggingface_model = HuggingFaceModel(\n",
    "   model_data=\"s3://sagemaker-us-west-2-847380964353/xlm-roberta-base-epoch1-2021-11-08-08-17-03-658/output/model.tar.gz\",  # path to your trained sagemaker model\n",
    "   role=role, # iam role with permissions to create an Endpoint\n",
    "   transformers_version=\"4.6\", # transformers version used\n",
    "   pytorch_version=\"1.7\", # pytorch version used\n",
    "   py_version=\"py36\", # python version of the DLC\n",
    ")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "predictor = huggingface_model.deploy(\n",
    "   initial_instance_count=1,\n",
    "   instance_type=\"ml.g4dn.xlarge\"\n",
    ")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# example request, you always need to define \"inputs\"\n",
    "data = {\n",
    "   \"inputs\": \"The new Hugging Face SageMaker DLC makes it super easy to deploy models in production. I love it!\"\n",
    "}\n",
    "\n",
    "# request\n",
    "predictor.predict(data)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_pytorch_latest_p36",
   "language": "python",
   "name": "conda_pytorch_latest_p36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}